{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# numpy\n",
    "\n",
    "* 数値計算や配列計算をサポートする拡張モジュール"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## ndarray(numpy配列)\n",
    "\n",
    "* asarrayメソッドで取得する"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1, 2, 3, 4, 5]\n",
      "<class 'numpy.ndarray'>\n",
      "[1 2 3 4 5]\n",
      "1\n",
      "2\n",
      "3\n",
      "4\n",
      "5\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "num_list = [1,2,3,4,5]\n",
    "print(num_list)\n",
    "\n",
    "nd_ary = np.asarray(num_list)\n",
    "print(type(nd_ary))\n",
    "\n",
    "print(nd_ary)\n",
    "\n",
    "for i in nd_ary:\n",
    "    print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[1 2 3]\n",
      " [4 5 6]\n",
      " [7 8 9]]\n",
      "[1 2 3]\n",
      "[4 5 6]\n",
      "[7 8 9]\n"
     ]
    }
   ],
   "source": [
    "# ループもできる\n",
    "import numpy as np\n",
    "num_list = [[1,2,3], [4,5,6], [7,8,9]]\n",
    "nd_ary = np.asarray(num_list)\n",
    "\n",
    "print(nd_ary)\n",
    "\n",
    "for i in nd_ary:\n",
    "    print(i)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## ndarrayをリストに変換\n",
    "\n",
    "* tolistメソッドを利用\n",
    "    * リスト ↔ numpy配列 の変換はよく使う"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[1 2 3]\n",
      " [4 5 6]\n",
      " [7 8 9]]\n",
      "[[1, 2, 3], [4, 5, 6], [7, 8, 9]]\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "num_list = [[1,2,3], [4,5,6], [7,8,9]]\n",
    "nd_ary = np.asarray(num_list)\n",
    "\n",
    "# リストに変換\n",
    "to_list = nd_ary.tolist()\n",
    "print(nd_ary)\n",
    "print(to_list)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 型を指定して配列を取得\n",
    "\n",
    "* uint(符号なし整数)やcomplexなども指定できる"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[1 2 3]\n",
      " [4 5 6]\n",
      " [7 8 9]]\n",
      "[[1. 2. 3.]\n",
      " [4. 5. 6.]\n",
      " [7. 8. 9.]]\n",
      "[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "num_list = [[1,2,3], [4,5,6], [7,8,9]]\n",
    "\n",
    "print(np.asarray(num_list, dtype = np.int64))\n",
    "print(np.asarray(num_list, dtype = np.float64))\n",
    "print(np.asarray(num_list, dtype = np.float64).tolist())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 型を変更\n",
    "\n",
    "* astypeメソッドを利用する\n",
    "* 破壊的メソッドではなく変更したものが返される"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[1 2 3]\n",
      " [4 5 6]\n",
      " [7 8 9]]\n",
      "[[1. 2. 3.]\n",
      " [4. 5. 6.]\n",
      " [7. 8. 9.]]\n",
      "[[1 2 3]\n",
      " [4 5 6]\n",
      " [7 8 9]]\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "num_list = [[1,2,3], [4,5,6], [7,8,9]]\n",
    "\n",
    "nd_ary = np.asarray(num_list)\n",
    "print(nd_ary)\n",
    "print(nd_ary.astype(np.float64))\n",
    "\n",
    "# nd_aryの型は変更されていない\n",
    "print(nd_ary)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 配列初期化\n",
    "\n",
    "* empty\n",
    "    * 初期化しない\n",
    "        * 値は都度異なる\n",
    "        * 初期化しなので生成は高速\n",
    "        * 初期化済みの場合は何もしない\n",
    "* zeros\n",
    "    * 0.0で初期化\n",
    "* ones\n",
    "    * 1.0で初期化\n",
    "* arange\n",
    "    * 連番で初期化\n",
    "* full\n",
    "    * 任意の値で初期化\n",
    "* identity\n",
    "    * 行列同じの正方形で初期化\n",
    "* diag\n",
    "    * 対角要素の取得"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[6.95332146e-310 0.00000000e+000 0.00000000e+000 0.00000000e+000\n",
      " 0.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000]\n",
      "[[6.95332149e-310 6.92298238e-310 0.00000000e+000]\n",
      " [0.00000000e+000 0.00000000e+000 0.00000000e+000]\n",
      " [0.00000000e+000 0.00000000e+000 0.00000000e+000]\n",
      " [0.00000000e+000 0.00000000e+000 0.00000000e+000]]\n",
      "[0. 0. 0. 0. 0.]\n",
      "[[0. 0. 0.]\n",
      " [0. 0. 0.]]\n",
      "[1. 1. 1. 1. 1. 1.]\n",
      "[[1. 1. 1.]\n",
      " [1. 1. 1.]\n",
      " [1. 1. 1.]]\n",
      "[2.  2.5 3.  3.5 4.  4.5]\n",
      "[[7 7]\n",
      " [7 7]]\n",
      "[[1. 0. 0.]\n",
      " [0. 1. 0.]\n",
      " [0. 0. 1.]]\n",
      "[[1 2 3]\n",
      " [4 5 6]\n",
      " [7 8 9]]\n",
      "[1 5 9]\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "print(np.empty(8))\n",
    "print(np.empty([4,3]))\n",
    "print(np.zeros(5))\n",
    "print(np.zeros([2,3]))\n",
    "print(np.ones(6))\n",
    "print(np.ones([3,3]))\n",
    "print(np.arange(2,5,0.5))\n",
    "print(np.full((2,2), 7))\n",
    "print(np.identity(3))\n",
    "\n",
    "num_list = [[1,2,3], [4,5,6], [7,8,9]]\n",
    "print(np.asarray(num_list))\n",
    "print(np.diag(num_list))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 型を指定して初期化\n",
    "\n",
    "* dtypeで指定"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ True False False False  True False False False  True]\n",
      "[[0 0 0]\n",
      " [0 0 0]]\n",
      "[[1 1 1]\n",
      " [1 1 1]\n",
      " [1 1 1]]\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "print(np.empty(9, dtype=np.bool))\n",
    "print(np.zeros([2,3], dtype=np.int64))\n",
    "print(np.ones([3,3], dtype=np.int64))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## ndarrayの構造確認\n",
    "\n",
    "* ndim\n",
    "    * 次元数\n",
    "* size\n",
    "    * 要素数\n",
    "* shape\n",
    "    * 次元毎の要素数\n",
    "* nbytes\n",
    "    * 全体のサイズ(バイト数)\n",
    "* dtype\n",
    "    * 型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[[0 0 0 0]\n",
      "  [0 0 0 0]]\n",
      "\n",
      " [[0 0 0 0]\n",
      "  [0 0 0 0]]\n",
      "\n",
      " [[0 0 0 0]\n",
      "  [0 0 0 0]]]\n",
      "3\n",
      "24\n",
      "(3, 2, 4)\n",
      "192\n",
      "int64\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "nd_ary = np.zeros((3,2,4), dtype=np.int64)\n",
    "print(nd_ary)\n",
    "\n",
    "print(nd_ary.ndim)\n",
    "print(nd_ary.size)\n",
    "print(nd_ary.shape)\n",
    "print(nd_ary.nbytes)\n",
    "print(nd_ary.dtype)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 行列の取り出し"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[1 2 3]\n",
      " [4 5 6]\n",
      " [7 8 9]]\n",
      "[4 5 6]\n",
      "[2 5 8]\n",
      "5\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "num_list = [[1,2,3], [4,5,6], [7,8,9]]\n",
    "\n",
    "nd_ary = np.asarray(num_list)\n",
    "print(nd_ary)\n",
    "\n",
    "### 2行目取り出し\n",
    "print(nd_ary[1,:])\n",
    "\n",
    "### 2列目取り出し\n",
    "print(nd_ary[:,1])\n",
    "\n",
    "### 2行2列目の要素を取り出し\n",
    "print(nd_ary[1,1])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## numpy演算\n",
    "\n",
    "* add\n",
    "    * 加算\n",
    "    * ndarray同士の演算が可能\n",
    "* subtract\n",
    "    * 減算\n",
    "    * ndarray同士の演算が可能\n",
    "* multiply\n",
    "    * 乗算\n",
    "    * ndarray同士の演算が可能\n",
    "* divide\n",
    "    * 除算\n",
    "    * ndarray同士の演算が可能\n",
    "* mod\n",
    "    * 剰余\n",
    "    * ndarray同士の演算が可能\n",
    "* power\n",
    "    * 累乗\n",
    "    * ndarray同士の演算が可能\n",
    "* 平方根\n",
    "    * sqrt\n",
    "* サイン\n",
    "    * sin\n",
    "* コサイン\n",
    "    * cos\n",
    "* タンジェント\n",
    "    * tan"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "8\n",
      "[ 8  9 14]\n",
      "[6 3 4]\n",
      "[ 7 18 45]\n",
      "[7.  2.  1.8]\n",
      "[0 0 4]\n",
      "[      1     729 1953125]\n",
      "1.4142135623730951\n",
      "1.0\n",
      "-1.0\n",
      "0.9999999999999999\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "nd_ary_1 = np.asarray([1,3,5])\n",
    "nd_ary_2 = np.asarray([7,6,9])\n",
    "\n",
    "print(np.add(nd_ary_1[0], nd_ary_2[0]))\n",
    "print(np.add(nd_ary_1, nd_ary_2))\n",
    "print(np.subtract(nd_ary_2, nd_ary_1))\n",
    "print(np.multiply(nd_ary_1, nd_ary_2))\n",
    "print(np.divide(nd_ary_2, nd_ary_1))\n",
    "print(np.mod(nd_ary_2, nd_ary_1))\n",
    "print(np.power(nd_ary_1, nd_ary_2))\n",
    "print(np.sqrt(2))\n",
    "print(np.sin(np.pi * 1/2))\n",
    "print(np.cos(np.pi))\n",
    "print(np.tan(np.pi * 1/4))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 統計関数\n",
    "\n",
    "* sum\n",
    "    * 合計値\n",
    "* mean\n",
    "    * 平均値\n",
    "* amax\n",
    "    * 最大値\n",
    "* amin\n",
    "    * 最小値\n",
    "* ptp\n",
    "    * 範囲 (最大値 - 最小値)\n",
    "* median\n",
    "    * 中央値\n",
    "* std\n",
    "    * 標準偏差"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "45\n",
      "5.0\n",
      "9\n",
      "1\n",
      "8\n",
      "5.0\n",
      "2.581988897471611\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "nd_ary = np.asarray([1,2,3,4,5,6,7,8,9])\n",
    "\n",
    "print(np.sum(nd_ary))\n",
    "print(np.mean(nd_ary))\n",
    "print(np.amax(nd_ary))\n",
    "print(np.amin(nd_ary))\n",
    "print(np.ptp(nd_ary))\n",
    "print(np.median(nd_ary))\n",
    "print(np.std(nd_ary))\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## ndarrayのファイル入出力\n",
    "\n",
    "* 保存形式\n",
    "    * バイナリ形式\n",
    "        * np.save / np.load\n",
    "        * 軽い\n",
    "        * 拡張子は「.npy」\n",
    "    * テキスト形式\n",
    "        * np.savetxt / np.loadtxt\n",
    "        * 扱えるのは2次元まで"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[1 2 3]\n",
      " [4 5 6]\n",
      " [7 8 9]]\n",
      "[[1 2 3]\n",
      " [4 5 6]\n",
      " [7 8 9]]\n",
      "[[1. 2. 3.]\n",
      " [4. 5. 6.]\n",
      " [7. 8. 9.]]\n",
      "[[1 2 3]\n",
      " [4 5 6]\n",
      " [7 8 9]]\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "num_list = [[1,2,3], [4,5,6], [7,8,9]]\n",
    "\n",
    "nd_ary = np.asarray(num_list)\n",
    "print(nd_ary)\n",
    "\n",
    "np.save('files/nd_ary_bin.npy',nd_ary)\n",
    "nd_ary_bin = np.load('files/nd_ary_bin.npy')\n",
    "print(nd_ary_bin)\n",
    "\n",
    "np.savetxt('files/nd_ary.txt',nd_ary)\n",
    "nd_ary_txt = np.loadtxt('files/nd_ary.txt')\n",
    "print(nd_ary_txt)\n",
    "\n",
    "# 整数\n",
    "np.savetxt('files/nd_ary_int.txt', nd_ary, fmt='%d')\n",
    "nd_ary_txt_int = np.loadtxt('files/nd_ary_int.txt', dtype=int)\n",
    "print(nd_ary_txt_int)\n",
    "\n",
    "# csv形式で保存\n",
    "np.savetxt('files/nd_ary_int_csv.txt', nd_ary, fmt='%d', delimiter=\",\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "�NUMPY\u0001\u0000v\u0000{'descr': '<i8', 'fortran_order': False, 'shape': (3, 3), }                                                          \n",
      "\u0001\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0003\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0004\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0005\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0006\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0007\u0000\u0000\u0000\u0000\u0000\u0000\u0000\b\u0000\u0000\u0000\u0000\u0000\u0000\u0000\t\u0000\u0000\u0000\u0000\u0000\u0000\u00001.000000000000000000e+00 2.000000000000000000e+00 3.000000000000000000e+00\n",
      "4.000000000000000000e+00 5.000000000000000000e+00 6.000000000000000000e+00\n",
      "7.000000000000000000e+00 8.000000000000000000e+00 9.000000000000000000e+00\n",
      "1 2 3\n",
      "4 5 6\n",
      "7 8 9\n",
      "1,2,3\n",
      "4,5,6\n",
      "7,8,9\n"
     ]
    }
   ],
   "source": [
    "%%bash\n",
    "cat files/nd_ary_bin.npy\n",
    "cat files/nd_ary.txt\n",
    "cat files/nd_ary_int.txt\n",
    "cat files/nd_ary_int_csv.txt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## CSVの読み込みと解析例"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "## CSV作成\n",
    "import csv\n",
    "header = ['日付', '店舗1', '店舗2', '店舗3']\n",
    "data = [['2018//6/1',42520,18373,43522], ['2018//6/2',21660,13211,67534], ['2018//6/3',65261,13822,55988], ['2018//6/4',75428,24358,45327], ['2018//6/5',32987,19654,58750]]\n",
    "with open('csv/numpy_売上.csv', mode='w', encoding='utf-8') as fp:\n",
    "    csv_writer = csv.writer(fp, lineterminator=\"\\n\")\n",
    "    csv_writer.writerow(header)\n",
    "    csv_writer.writerows(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "日付,店舗1,店舗2,店舗3\n",
      "2018//6/1,42520,18373,43522\n",
      "2018//6/2,21660,13211,67534\n",
      "2018//6/3,65261,13822,55988\n",
      "2018//6/4,75428,24358,45327\n",
      "2018//6/5,32987,19654,58750\n"
     ]
    }
   ],
   "source": [
    "%%bash\n",
    "cat csv/numpy_売上.csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'店舗': ['店舗1', '店舗2', '店舗3'], '最大': [75428, 24358, 67534], '最小': [21660, 13211, 43522], '平均': [47571, 17883, 54224], '範囲': [53768, 11147, 24012], '合計': [237856, 89418, 271121]}\n"
     ]
    }
   ],
   "source": [
    "import csv\n",
    "import numpy as np\n",
    "with open('csv/numpy_売上.csv', mode=\"r\", encoding='utf-8') as fp:\n",
    "    csv_reader = csv.reader(fp)\n",
    "    csv_list = list(csv_reader)\n",
    "    \n",
    "# ヘッダの抜き取り\n",
    "csv_head = csv_list[0]\n",
    "\n",
    "# 店舗名のみ取得\n",
    "store_list = csv_head[1:]\n",
    "\n",
    "# 日付と売上のみ取得\n",
    "sale_list = csv_list[1:]\n",
    "\n",
    "# numpy配列に日付と売上を入れる\n",
    "sale_ndary = np.asanyarray(sale_list)\n",
    "#print(sale_ndary)\n",
    "#sale_ndary = np.delete(sale_ndary, 0, 1)\n",
    "#print(sale_ndary)\n",
    "\n",
    "# 店舗毎の統計を取る\n",
    "# col = 1とすることで日付列を除外する\n",
    "col = 1\n",
    "store_statis_dict = {\"店舗\":[], \"合計\":[], \"平均\":[], \"最大\":[], \"最小\":[], \"範囲\":[], }\n",
    "\n",
    "for store in store_list:\n",
    "    store_statis_dict[\"店舗\"].append(store)\n",
    "    store_statis_dict[\"合計\"].append(np.sum(sale_ndary[:,col].astype(np.int64)))\n",
    "    store_statis_dict[\"平均\"].append(int(np.mean(sale_ndary[:,col].astype(np.int64))))\n",
    "    store_statis_dict[\"最大\"].append(np.amax(sale_ndary[:,col].astype(np.int64)))\n",
    "    store_statis_dict[\"最小\"].append(np.amin(sale_ndary[:,col].astype(np.int64)))\n",
    "    store_statis_dict[\"範囲\"].append(np.ptp(sale_ndary[:,col].astype(np.int64)))\n",
    "    col += 1\n",
    "print(store_statis_dict)\n",
    "\n",
    "csv_statis_list = []\n",
    "for store, statis in store_statis_dict.items():\n",
    "    #print(store)\n",
    "    #print(statis)\n",
    "    \n",
    "    csv_statis = [store]\n",
    "    csv_statis.extend(statis)\n",
    "    #print(csv_statis)\n",
    "    \n",
    "    csv_statis_list.append(csv_statis)\n",
    "#print(csv_statis_list)\n",
    "\n",
    "with open('csv/numpy_売上統計.csv', mode=\"w\", encoding='utf-8') as fp:\n",
    "    csv_writer = csv.writer(fp, lineterminator=\"\\n\")\n",
    "    csv_writer.writerows(csv_statis_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "店舗,店舗1,店舗2,店舗3\n",
      "最大,75428,24358,67534\n",
      "平均,47571,17883,54224\n",
      "合計,237856,89418,271121\n",
      "最小,21660,13211,43522\n",
      "範囲,53768,11147,24012\n"
     ]
    }
   ],
   "source": [
    "%%bash\n",
    "cat csv/numpy_売上統計.csv"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
